This notebook contains code to replicate analyses reported in CHI submission.
In VISUALIZATION VIBES project Study 2, participants completed an attitutde eliciation survey, asking questions about their attitude toward (5) stimulus images (data visualizations). Each participant was randomly assigned to one of 6 stimulus blocks, each containing 1 image from each of (4) categories (ranging from most abstract to most figural). Each participant started by responding to questions for a single ‘common block’ stimulus (that is thus super-powered as it was seen by all participants). Two participant recruitment pools were used: Prolific, with a smaller set of participants recruited from Tumblr (to replicate results of interview Study 1 with participants sourced from Tumblr).
We start by importing data files previously wrangled in
0_VIBES_S2_wrangling.Rmd.
############## IMPORT REFERENCE FILES
ref_stimuli <- readRDS("data/input/REFERENCE/ref_stimuli.rds")
ref_surveys <- readRDS("data/input/REFERENCE/ref_surveys.rds")
############## IMPORT DATA FILES
# df_data <- readRDS("data/output/df_data.rds")
df_participants <- readRDS("data/output/df_participants.rds")
# df_questions <- readRDS("data/output/df_questions.rds")
# df_sd_questions_wide <- readRDS("data/output/df_sd_questions_wide.rds")
# df_sd_questions_long <- readRDS("data/output/df_sd_questions_long.rds")
df_tools <- readRDS("data/output/df_tools.rds")
df_actions <- readRDS("data/output/df_actions.rds")
# df_graphs_full <- readRDS("data/output/df_graphs_full.rds")
df_graphs <- readRDS("data/output/df_graphs.rds")
############## SETUP Graph Labels
ref_stim_id <- levels(ref_stimuli$ID)
ref_cat_questions <- c("MAKER_ID","MAKER_AGE","MAKER_GENDER")
ref_free_response <- c("MAKER_DETAIL", "MAKER_EXPLAIN", "TOOL_DETAIL", "CHART_EXPLAIN")
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- c("MAKER_DESIGN","MAKER_DATA","MAKER_POLITIC",
"MAKER_ARGUE","MAKER_SELF","MAKER_ALIGN","MAKER_TRUST",
"CHART_LIKE", "CHART_BEAUTY", "CHART_INTENT", "CHART_TRUST")
left <- c("professional","professional","left-leaning","confrontational",
"altruistic","does NOT share","untrustworthy",
"NOT at all","NOT at all", "inform", "untrustworthy")
right <- c("layperson","layperson","right-leaning","diplomatic",
"selfish", "DOES share", "trustworthy",
"very much", "very much", "persuade", "trusthworthy")
ref_labels <- as.data.frame(cbind(left,right))
rownames(ref_labels) <- ref_sd_questions
# ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
ref_blocks <- c(1,2,3,4,5,6)
rm(left,right)
############## SETUP Colour Palettes
#https://www.r-bloggers.com/2022/06/custom-colour-palettes-for-ggplot2/
## list of color pallettes
my_colors = list(
politics = c("#184aff","#5238bf", "#4f4a52" ,"#84649c", "#ff0000"),
blackred = c("black","red"),
greens = c("#ADC69D","#81A06D","#567E39","#2D5D16","#193E0A"),
smallgreens = c("#ADC69D","#567E39","#193E0A"),
olives = c("#CDCEA1","#B8B979","#A0A054","#78783F","#50502A","#35351C"),
lightblues = c("#96C5D2","#61A2B2","#3C8093","#2C6378","#1F4A64"),
darkblues = c("#7AAFE1","#3787D2","#2A73B7","#225E96","#1A4974","#133453"),
reds = c("#D9B8BD","#CE98A2","#B17380","#954E5F","#78263E","#62151F")
)
## function for using palettes
my_palettes = function(name, n, all_palettes = my_colors, type = c("discrete","continuous"), direction = c("1","-1")) {
palette = all_palettes[[name]]
if (missing(n)) {
n = length(palette)
}
type = match.arg(type)
out = switch(type,
continuous = grDevices::colorRampPalette(palette)(n),
discrete = palette[1:n]
)
out = switch(direction,
"1" = out,
"-1" = palette[n:1])
structure(out, name = name, class = "palette")
}
############## RETURNS SD STACKED AND COLORED BY BY X
## LOOP STYLE
multi_sd <- function (df, left, right, x, y, color) {
# g <- ggplot(df, aes(y = .data[[x]], x = {{y}}, color = {{color}}))+
g <- ggplot(df, aes(y = .data[[x]], x = .data[[y]], color = .data[[color]]))+
geom_boxplot(width = 0.5) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) + theme_minimal()
return(g)
}
############## RETURNS SINGLE SD
## LOOP STYLE
single_sd <- function (df, left, right, x) {
g <- ggplot(df, aes(y = {{x}}, x = ""))+
geom_boxplot(width = 0.5) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) + theme_minimal()
return(g)
}
######## RETURNS SINGLE SD
## APPLY STYLE
# plot_sd = function (data, column, type, split, boxplot) {
#
# ggplot(df, aes(y = .data[[column]], x="")) +
# {if(boxplot) geom_boxplot(width = 0.5) } +
# geom_jitter(width = 0.1, alpha=0.3, {if(split) aes(color=Distribution)}) +
# {if(split) facet_grid(Distribution ~ .)} +
# scale_y_continuous(limits=c(-1,101)) +
# labs(x="", y="") +
# coord_flip() +
# {if(type == "S")
# guides(
# y = guide_axis_manual(labels = ref_labels[column,"left"]),
# y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
# )} +
# {if(type == "Q")
# guides(
# y = guide_axis_manual(labels = ref_labels[q,"left"]),
# y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
# )} +
# theme_minimal() +
# labs (
# caption = column
# )
# }
######## RETURNS SINGLE SD
## APPLY STYLE
plot_sd = function (data, column, type, facet, facet_by, boxplot) {
ggplot(df, aes(y = .data[[column]], x="")) +
{if(boxplot) geom_boxplot(width = 0.5) } +
geom_jitter(width = 0.1, alpha=0.3, {if(facet) aes(color=.data[[facet_by]])}) +
{if(facet) facet_grid(.data[[facet_by]] ~ .)} +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
{if(type == "S")
guides(
y = guide_axis_manual(labels = ref_labels[column,"left"]),
y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
)} +
{if(type == "Q")
guides(
y = guide_axis_manual(labels = ref_labels[q,"left"]),
y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
)} +
theme_minimal() +
labs (
caption = column
) + easy_remove_legend()
}
## FOR DESCRIPTIVES PARAGRAPH
# #PROLIFIC
df.p <- df_participants %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table()
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)
# #TUMBLR
df.t <- df_participants %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table()
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)
A total of 318 participants were recruited from US-located English speaking users of Tumblr (n = 78) and Prolific (n = 240).
240 individuals from PROLIFIC participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).
Note that a higher proportion of participants recruited from Tumblr represent identities other than cis-gender Female and cis-gender Male. 78 individuals from Tumblr participated in Study 2, ( 36% Female, 5% Male, 40% Non-binary, 19% Other).
df <- df_participants
## for descriptives paragraph
p.desc.duration <- psych::describe(df %>% filter(Distribution=="PROLIFIC") %>% pull(duration.min))
t.desc.duration <- psych::describe(df %>% filter(Distribution=="TUMBLR") %>% pull(duration.min))
PROLIFIC SAMPLE (n = 240 ) participant response times ranged from 13.97 to 216.18 minutes, with a mean response time of 42.49 minutes, SD = 21.15.
TUMBLR SAMPLE (n = 78 ) participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 51.93 minutes, SD = 35.47.
rm(df, df.p, df.t)
In the interest of space, we report analysis of 1 stimulus block (stimulus block == 2; pictured in the teaser image), rather than analysis of all six blocks.
#block 2 participant-level data
df_b2_p <- df_participants %>% filter(Assigned.Block ==2)
#block 2 stimulus-level data
df_b2 <- df_graphs %>%
filter(str_detect(STIMULUS,"B2"))
# %>%
# mutate(MAKER_ID = fct_rev(MAKER_ID))
(n = 52 ) survey respondents answered questions about the (4) stimuli in block 2, yielding (r = 208) observations.
Participants were asked: Who do you think is most likely
responsible for having this image created? The response is
stored as MAKER_ID
#FILTER DATASET
df <- df_b2
## A
## MAKER IDENTIFICATION BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_ID)) +
geom_bar(position = "stack") +
facet_grid( Distribution ~ ., scales = "free", space = "free") +
labs( title = "MAKER ID by Stimulus and SAMPLE",
subtitle = "", x = "") +
scale_fill_manual(values = my_palettes(name="reds", direction = "1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
# coord_flip()+
easy_add_legend_title("") +
theme_minimal()
##############################
## B
## MAKER IDENTIFICATION BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
B <- ggbarstats( data = dx,
x = MAKER_ID, y = STIMULUS,
legend.title = "MAKER ID",
results.subtitle = FALSE) +
scale_fill_manual(values = my_palettes(name="reds", direction = "1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
labs( title = "MAKER ID BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## S
## MAKER ID REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- dx$MAKER_ID %>% levels #list order of factor levels
ds <- df_b2 %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(STIMULUS, MAKER_ID, PID, Distribution) %>%
#pivot wider
pivot_wider(
names_from = STIMULUS,
values_from = MAKER_ID
) %>%
#prep for geom_sankey
make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>%
mutate(
match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
node = factor(node, levels = order),
next_node = factor(next_node, levels = order)
)
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = node,
))+
geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color = "white") +
geom_alluvial_text(aes( x = as.numeric(x), label = after_stat(freq)),
size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
scale_fill_manual(values = my_palettes(name="reds", direction = "-1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
labs(title = "MAKER ID by STIMULUS",
x = "STIMULUS", y = "(count)", fill = "MAKER ID",
caption = "") +
theme_minimal()
##############################
# DISPLAY GRAPHS
B
S
# CLEANUP
rm(df, ds, dx, order)
To explore whether variance in maker identifications is more
likely a function of participant or stimulus , we visualize the
MAKER_ID data via an alluvial diagram. Here we see that
there are no stable identifications (ie. paths through
each stimulus with the same category) indicating that no participants
had a strong preference for a particular identification category.
Rather, participants seem to be identifying the maker category as a
function of some features of the stimuli, rather than a trait/state
inference about makers of all stimuli in general.
Participants were asked: Please rate your confidence in this
choice. The response is stored as MAKER_CONF .
## DESCRIBE MAKER CONFIDENCE
df <- df_b2 %>%
select(PID, STIMULUS, MAKER_ID, MAKER_CONF)
desc.maker_conf <- psych::describe(df$MAKER_CONF)
## H
## Distribution of MAKER_CONF
## GGSTATS HISTOGRAM
H <- gghistostats(df, x = MAKER_CONF,
test.value = 50,
binwidth = 5,
centrality.plotting = TRUE,
centrality.type = "parametric",
bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
) +
labs(title = "Distribution of Maker ID Confidence (Block 2)",
caption = paste0("test value = 50; sd = ", round(desc.maker_conf$sd,0))) +
theme_minimal()
##############################
# DISPLAY GRAPHS
H
# CLEANUP
rm(df)
Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker identifications] ranging from1 to 100 , with a mean of 64.7. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty).
#TEMP FILTER DATAFRAME
temp <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(PID, STIMULUS, MAKER_ID, MAKER_CONF) %>%
group_by(PID) %>%
summarise(
min = min(MAKER_CONF),
max = max(MAKER_CONF),
range = max-min,
low_var = ifelse(range < 20, TRUE, FALSE)
)
#proportion of low variance participants
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)
#FILTER DATAFRAME
df <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(PID, STIMULUS, MAKER_ID, MAKER_CONF) %>%
group_by(PID) %>%
mutate(
min = min(MAKER_CONF),
max = max(MAKER_CONF),
range = max-min,
low_var = ifelse(range < 20, TRUE, FALSE))
## I
## MAKER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT
I <- ggplot(df, aes(STIMULUS,MAKER_CONF)) +
geom_boxplot(width = 0.3)+
stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE,
vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
labs(
title = "Confidence in Maker ID",
y = "MAKER ID CONFIDENCE (%)",
x = "Stimulus",
subtitle = paste0("Scores range from ",desc.maker_conf$min, " to ", desc.maker_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
caption = "lines connect observations from the same participant
red indicates participants with less than 20 points range between confidence scores
(i.e. participants whose confidence is stable, invariant across stimuli)") +
theme_minimal() + easy_remove_legend()
##############################
# MAKER_CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = MAKER_CONF)) +
# geom_boxplot(width = 0.5)+
# geom_jitter(width = 0.10, alpha = 0.4) +
# labs (title = "",
# x = "", y = "MAKER ID CONFIDENCE") +
# theme_minimal()
# DISPLAY GRAPHS
I
# CLEANUP
rm(df, temp)
To explore whether variance in confidence in maker identification differs as a function of stimulus, or as a property of an individual subject, we visualize the distribution of confidence scores for each stimulus, where connecting lines indicate responses for a single participant. Participants with less than 20 point difference between their minimum and maximum confidence scores are indicated in red, ( 31 of the 52 participants.) Note that the participants with low variance in confidence scores are clustered near the high end of the confidence scale, with the exception of two subjects near 50%. The mean confidence score for each stimulus (blue) is greater than 50%, and no participants indicated consistently low confidence across all stimuli, indicating that in general, participants were able to respond thoughtfully to the maker identification question.
df <- df_b2
## MC
## MAKER_CONFIDENCE by IDENTIFICATION
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = MAKER_CONF, color = MAKER_ID)) +
geom_boxplot(position=position_dodge(0.8), width = 0.5)+
geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
scale_color_manual(values = my_palettes(name="reds", direction = "1")) +
# scale_color_viridis(discrete=TRUE, option="viridis") +
labs (title = "",
x = "", y = "Maker ID Confidence") +
theme_minimal()
###################################################
# DISPLAY GRAPHS
(p <- ( B/MC ) + plot_annotation(
title = 'MAKER ID & CONFIDENCE by STIMULUS',
subtitle = '',
caption = ''))
rm(df, A, B, S, H, I, MC, p)
rm(desc.maker_conf)
Inspecting Maker ID Confidence by Maker ID category, there does
not appear to be any systematic relationship between the category of
Maker a participant chooses for a stimulus, and their confidence in that
choice (i.e. It is not the case that identifications of
MAKER(ID) = ORGANIZATION , for example, are made with
greater or less confidence than identifications of
MAKER(ID) = INDIVIDUAL. Statistical tests of independence
(between confidence and maker id) are ill-advised because the number of
observations of each maker ID vary greatly between stimuli.
Taken together, this analysis of correspondence between
MAKER_ID and MAKER_CONF suggests that both the
inference identifying the kind of maker as well as the participant’s
confidence in that inference are likely conditioned on some features of
the stimulus rather than more stable state/trait(s) of the
participant.
Participants were asked: Take a moment to imagine the
person(s) responsible for creating the image. What
generation are they most likely from? The response was saved as
MAKER_AGE
#FILTER DATASET
df <- df_b2
## A
## MAKER AGE BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_AGE)) +
geom_bar(position = "stack") +
facet_grid( Distribution ~ ., scales = "free", space = "free") +
labs( title = "MAKER AGE by Stimulus and SAMPLE",
subtitle = "", x = "") +
scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +
#scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
# coord_flip()+
easy_add_legend_title("") +
theme_minimal()
##############################
## B
## MAKER AGE BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate(MAKER_AGE = fct_rev(MAKER_AGE))
B <- ggbarstats( data = dx,
x = MAKER_AGE, y = STIMULUS,
legend.title = "MAKER AGE",
results.subtitle = FALSE) +
scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
labs( title = "MAKER AGE BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## S
## MAKER AGE REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- dx$MAKER_AGE %>% levels #list order of factor levels
ds <- df_b2 %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(STIMULUS, MAKER_AGE, PID, Distribution) %>%
#pivot wider
pivot_wider(
names_from = STIMULUS,
values_from = MAKER_AGE
) %>%
#prep for geom_sankey
make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>%
mutate(
match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
node = factor(node, levels = order),
next_node = factor(next_node, levels = order)
)
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = node,
))+
geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color="white") +
geom_alluvial_text(aes( x = as.numeric(x), label = after_stat(freq)),
size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
scale_fill_manual(values = my_palettes(name="lightblues", direction = "-1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
labs(title = "MAKER AGE by STIMULUS",
x = "STIMULUS", y = "(count)", fill = "MAKER AGE",
caption = "") +
theme_minimal()
##############################
# DISPLAY GRAPHS
B
S
# CLEANUP
rm(df, ds, dx, order)
To explore whether variance in maker age attributions are more
likely a function of participant or stimulus , we visualize the
MAKER_AGE data via an alluvial diagram. Here we see that
(unlike MAKER_ID) there are a number of stable
identifications (ie. paths through each stimulus with the same category)
indicating that some participants consistently attributed all stimuli to
a particular age category. This occured most often with the ‘Millenial’
and ‘GenX’ age categories. However, this pattern may be confounded by
both the true base rate of maker age (inferred based on working age of
likely makers).
Participants were asked: Please rate your confidence in this
choice. The response is stored as AGE_CONF .
## DESCRIBE MAKER AGE CONFIDENCE
df <- df_b2 %>%
select(PID, STIMULUS, MAKER_AGE, AGE_CONF)
desc.age_conf <- psych::describe(df$AGE_CONF)
## H
## Distribution of AGE_CONF
## GGSTATS HISTOGRAM
H <- gghistostats(df, x = AGE_CONF,
test.value = 50,
binwidth = 5,
centrality.plotting = TRUE,
centrality.type = "parametric",
bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
) +
labs(title = "Distribution of Maker AGE Confidence (Block 2)",
caption = paste0("test value = 50; sd = ", round(desc.age_conf$sd,0))) +
theme_minimal()
##############################
# DISPLAY GRAPHS
H
# CLEANUP
rm(df)
Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker age attribution] ranging from8 to 100 , with a mean of 62.1. According to (one-sample) student’s t-tests, the mean of the distribution is significantly different from both reference values of 0 (indicating no confidence) and 50% (indicating partial confidence) and 100% (indicating certainty).
#TEMP FILTER DATAFRAME
temp <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(PID, STIMULUS, MAKER_AGE, AGE_CONF) %>%
group_by(PID) %>%
summarise(
min = min(AGE_CONF),
max = max(AGE_CONF),
range = max-min,
low_var = ifelse(range < 20, TRUE, FALSE)
)
#proportion of low variance participants
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)
#FILTER DATAFRAME
df <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(PID, STIMULUS, MAKER_AGE, AGE_CONF) %>%
group_by(PID) %>%
mutate(
min = min(AGE_CONF),
max = max(AGE_CONF),
range = max-min,
low_var = ifelse(range < 20, TRUE, FALSE))
## I
## MAKER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT
I <- ggplot(df, aes(x = STIMULUS, y = AGE_CONF)) +
geom_boxplot(width = 0.2) +
stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE,
vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
labs(
title = "Confidence in Maker AGE",
y = "MAKER AGE CONFIDENCE (%)",
x = "Stimulus",
subtitle = paste0("Scores range from ",desc.age_conf$min, " to ", desc.age_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
caption = "lines connect observations from the same participant
red indicates participants with less than 20 points range between confidence scores
(i.e. participants whose confidence is stable, invariant across stimuli)") +
theme_minimal() + easy_remove_legend()
##############################
# MAKER_AGE CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = AGE_CONF)) +
# geom_boxplot(width = 0.5)+
# geom_jitter(width = 0.10, alpha = 0.4) +
# labs (title = "",
# x = "", y = "MAKER AGE CONFIDENCE") +
# theme_minimal()
# DISPLAY GRAPHS
I
# CLEANUP
rm(df, temp)
To explore whether variance in confidence in maker age differs as a
function of stimulus, or as a property of an individual subject, we
visualize the distribution of confidence scores for each stimulus, where
connecting lines indicate responses for a single participant.
Participants with less than 20 point difference between their minimum
and maximum confidence scores are indicated in red, ( r*
p_lowvar of the 52 participants.) Note that the
participants with low variance in confidence scores are clustered near
the high end of the confidence scale, with the exception of (2)
participants who have low variance scores clustered around 50% and (2)
participants clustered around 25%. The mean confidence score for each
stimulus (blue) is greater than 50%, and no participants indicated
consistently low confidence across all stimuli, indicating that in
general, participants were able to respond thoughtfully to the maker age
question.
df <- df_b2
## MC
## MAKER AGE CONFIDENCE by IDENTIFICATION
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = AGE_CONF, color = MAKER_AGE)) +
geom_boxplot(position=position_dodge(0.8), width = 0.5)+
geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
scale_color_manual(values = my_palettes(name="lightblues", direction = "1")) +
# scale_color_viridis(discrete=TRUE, option="viridis") +
labs (title = "",
x = "", y = "Maker Age Confidence") +
theme_minimal()
###################################################
# DISPLAY GRAPHS
(p <- ( B/MC ) + plot_annotation(
title = 'MAKER AGE & CONFIDENCE by STIMULUS',
subtitle = '',
caption = ''))
Inspecting Maker AGE Confidence by Maker AGE category, there does
not appear to be any systematic relationship between the category of
Maker AGE a participant chooses for a stimulus, and their confidence in
that choice (i.e. It is not the case that identifications of
MAKER(AGE) = MILLENIAL , for example, are made with greater
or less confidence than identifications of
MAKER(AGE) = BOOMER. Statistical tests of independence
(between confidence and maker age) are ill-advised because the number of
observations of each maker age vary greatly between stimuli.
Taken together, this analysis of correspondence between
MAKER AGE and MAKER AGE CONFIDENCE suggests
that both the inference identifying the age generation of maker (and the
participant’s confidence in that inference) are conditioned on some
features of the stimulus rather than more stable state/trait(s)
of the participant.
rm(df, A, B, S, H, I, MC, p)
rm(desc.age_conf)
Participants were asked: Take a moment to imagine the
person(s) responsible for creating the image. What
gender do they most likely identify with?
MAKER_GENDER
#FILTER DATASET
df <- df_b2
## A
## MAKER GENDER BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART
A <- ggplot( df, aes( x = (STIMULUS), fill = MAKER_GENDER)) +
geom_bar(position = "stack") +
facet_grid( Distribution ~ ., scales = "free", space = "free") +
labs( title = "MAKER GENDER by Stimulus and SAMPLE",
subtitle = "", x = "") +
scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +
#scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
# coord_flip()+
easy_add_legend_title("") +
theme_minimal()
##############################
## B
## MAKER GENDER BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate(MAKER_GENDER = (MAKER_GENDER))
B <- ggbarstats( data = dx,
x = MAKER_GENDER, y = STIMULUS,
legend.title = "MAKER GENDER",
results.subtitle = FALSE) +
scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", direction = -1) +
labs( title = "MAKER GENDER BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## S
## MAKER GENDER REPEATED MEASURES
## SANKEY DIAGRAM
###FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
order <- df_b2$MAKER_GENDER %>% levels #list order of factor levels
ds <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(STIMULUS, MAKER_GENDER, PID, Distribution) %>%
#pivot wider
pivot_wider(
names_from = STIMULUS,
values_from = MAKER_GENDER
) %>%
#prep for geom_sankey
make_long(`B2-1`,`B2-2`,`B2-3`,`B2-4`) %>%
mutate(
match = ifelse(node==next_node, 1, 0.5), # try to highlight throughflows
node = factor(node, levels = order),
next_node = factor(next_node, levels = order)
)
#SANKEY DIAGRAM
S <- ggplot(ds, aes(x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = node,
))+
geom_alluvial(width = 0.25, flow.alpha = 0.65, node.alpha = 1, node.color="white") +
geom_alluvial_text(aes( x = as.numeric(x), label = after_stat(freq)),
size = 3, color = "white", fontface = "bold", check_overlap = TRUE) +
scale_fill_manual(values = my_palettes(name="smallgreens", direction = "-1")) +
# scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE, alpha = 1) +
labs(title = "MAKER GENDER by STIMULUS",
x = "STIMULUS", y = "(count)", fill = "MAKER GENDER",
caption = "") +
theme_minimal()
##############################
# DISPLAY GRAPHS
B
S
# CLEANUP
rm(df, ds, dx, order)
Participants were asked: Please rate your confidence in this
choice. The response is stored as GENDER_CONF
.
## DESCRIBE MAKER GENDER CONFIDENCE
df <- df_b2 %>%
select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF)
desc.gender_conf <- psych::describe(df$GENDER_CONF)
## H
## Distribution of GENDER_CONF
## GGSTATS HISTOGRAM
H <- gghistostats(df, x = GENDER_CONF,
test.value = 50,
binwidth = 5,
centrality.plotting = TRUE,
centrality.type = "parametric",
bin.args = list(color = "black", fill = "grey50", alpha = 0.2)
) +
labs(title = "Distribution of Maker GENDER Confidence (Block 2)",
caption = paste0("test value = 50; sd = ", round(desc.gender_conf$sd,0))) +
theme_minimal()
##############################
# DISPLAY GRAPHS
H
# CLEANUP
rm(df)
Across the 208 observations, the 52 participants assigned to block 2 offered confidence values [on their maker gender attribution] ranging from0 to 100 , with a mean of 54. According to (one-sample) student’s t-tests, the mean of the distribution is NOT significantly different (at the 0.01 alpha level) from both reference values of 50% (indicating partial confidence). Relative to MAKER ID and MAKER AGE, participants expressed less confidence in their attribution of MAKER GENDER
#TEMP FILTER DATAFRAME
temp <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) %>%
group_by(PID) %>%
summarise(
min = min(GENDER_CONF),
max = max(GENDER_CONF),
range = max-min,
low_var = ifelse(range < 20, TRUE, FALSE)
)
#proportion of low variance participants
p_lowvar <- round(nrow(temp %>% filter(low_var))/ nrow(temp)*100,0)
#FILTER DATAFRAME
df <- df_graphs %>%
filter(str_detect(STIMULUS, "B2")) %>%
select(PID, STIMULUS, MAKER_GENDER, GENDER_CONF) %>%
group_by(PID) %>%
mutate(
min = min(GENDER_CONF),
max = max(GENDER_CONF),
range = max-min,
low_var = ifelse(range < 20, TRUE, FALSE))
## I
## MAKER GENDER CONFIDENCE (REPEATED MEASURES)
## WITHIN-SUBJECTS BOXPLOT
I <- ggplot(df, aes(x = STIMULUS, y = GENDER_CONF)) +
geom_boxplot(width = 0.2) +
stat_summary(fun.y="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
stat_summary(fun.y="mean", colour="blue", geom="text", fontface = "bold", show_legend = TRUE,
vjust=+0.5, hjust = 1.5, aes( label=round(..y.., digits=0)))+
geom_line(aes(group = PID, color = low_var), linetype=2, size=0.25) +
scale_color_manual(values = my_palettes(name="blackred", type = "discrete", direction="1")) +
geom_point(aes(group=PID), size= 2 ,alpha = 0.2) +
labs(
title = "Confidence in Maker GENDER",
y = "MAKER GENDER CONFIDENCE (%)",
x = "Stimulus",
subtitle = paste0("Scores range from ",desc.gender_conf$min, " to ", desc.gender_conf$max, " with ", p_lowvar,"% of ",nrow(temp)," participants (red) showing little variance in confidence across stimuli"),
caption = "lines connect observations from the same participant
red indicates participants with less than 20 points range between confidence scores
(i.e. participants whose confidence is stable, invariant across stimuli)") +
theme_minimal() + easy_remove_legend()
##############################
# MAKER_GENDER CONFIDENCE (VANILLA)
# ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF)) +
# geom_boxplot(width = 0.5)+
# geom_jitter(width = 0.10, alpha = 0.4) +
# labs (title = "",
# x = "", y = "MAKER GENDER CONFIDENCE") +
# theme_minimal()
# DISPLAY GRAPHS
I
# CLEANUP
rm(df, temp)
To explore whether variance in confidence in maker GENDER differs as
a function of stimulus, or as a property of an individual subject, we
visualize the distribution of confidence scores for each stimulus, where
connecting lines indicate responses for a single participant.
Participants with less than 20 point difference between their minimum
and maximum confidence scores are indicated in red, ( r*
p_lowvar of the 52 participants.) Note that the
participants with low variance in confidence scores are not clustered
near the high end of the confidence scale as with confidence in maker ID
and maker AGE … rather, low variance is maker GENDER confidence seems to
be evenly distributed across the confidence scale. The mean confidence
score for each stimulus (blue) are near 50%.
Unlike MAKER ID and MAKER AGE,
confidence in MAKER GENDER is generally more idiosyncratic,
and potentially reflective of an individual participant’s state/trait
inference about maker GENDER in general, rather than responding to
particular features of each stimuli. To verify, we will look for stimuli
that have strong patterns of difference in relative proportion of each
level of the MAKER GENDER variable.
df <- df_b2
## MC
## MAKER GENDER CONFIDENCE by IDENTIFICATION
## POSITION DODGE BAR PLOT
MC <- ggplot(df, aes( x = (STIMULUS), y = GENDER_CONF, color = MAKER_GENDER)) +
geom_boxplot(position=position_dodge(0.8), width = 0.5)+
geom_jitter(position=position_jitterdodge(dodge.width = 0.8, jitter.width= 0.2, ), alpha = 0.4) +
scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1")) +
# scale_color_viridis(discrete=TRUE, option="viridis") +
labs (title = "",
x = "", y = "Maker Gender Confidence") +
theme_minimal()
###################################################
# DISPLAY GRAPHS
(p <- ( B/MC ) + plot_annotation(
title = 'MAKER GENDER & CONFIDENCE by STIMULUS',
subtitle = '',
caption = ''))
Inspecting MAKER GENDER Confidence by
MAKER GENDER category, there does not appear to be any
systematic relationship between the category of Maker GENDER a
participant chooses for a stimulus, and their confidence in that choice
(i.e. It is not the case that identifications of
MAKER(GENDER) = MALE , for example, are made with greater
or less confidence than identifications of
MAKER(GENDER) = FEMALE. Statistical tests of independence
(between confidence and maker age) are ill-advised because the number of
observations of each maker age vary greatly between stimuli.
Taken together, this lack of variance in
GENDER_CONF within-subjects and between stimuli, along with
the limited number of categorical choices for the
MAKER GENDER variable indicates that the inference
identifying the gender generation of the maker (and the participant’s
confidence in that inference) may infact be conditioned on some
relatively stable state/trait(s) of the individual participant, rather
than features of the stimulus. To verify this intuition we will look for
stimuli with highly variant distributions of maker gender categories.
(e.g. largely female or other,rather than male)
rm(df, A, B, S, H, I, MC, p)
rm(desc.gender_conf)
#FILTER DATASET
df <- df_tools %>% filter( str_detect(STIMULUS, "B2"))
#TOOL ID BY STIMULUS AND SAMPLE
## FACETED HORIZONTAL BAR CHART
ggplot(data = df, aes( fill = fct_rev(TOOL_ID), x = fct_rev(STIMULUS) )) +
geom_bar(position = "stack") +
coord_flip() +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
# scale_y_continuous(labels = scales::percent) +
labs( title = "TOOL ID by Stimulus (grouped by Category)",
subtitle = "", x = "") +
easy_add_legend_title("TOOL_ID") +
theme_minimal()
#TOOL ID BY STIMULUS
## HORIZONTAL BAR CHART
ggplot( df, aes( x = fct_rev(STIMULUS), fill = fct_rev(TOOL_ID))) +
geom_bar(position = "stack") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") +
labs( title = "TOOL ID by Stimulus",
subtitle = "", x = "") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
coord_flip()+
easy_add_legend_title("") +
theme_minimal()
#TOOL ID GENDER BY STIMULUS
## GGSTATSPLOT
ggbarstats( data = df, x = "TOOL_ID", y = "STIMULUS",
legend.title = "TOOL ID") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
labs( title = "TOOL ID BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
# FILTER DATAFRAME
df <- df_tools %>% filter( str_detect(STIMULUS, "B2"))
######### TOOL ID AND CONFIDENCE ##############
a <- ggplot (df, aes( x = (STIMULUS), fill = fct_rev(TOOL_ID))) +
geom_bar(position = "fill", width = 0.8) +
scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
# facet_grid( df$ENCOUNTER) +
labs (x = "") +
easy_add_legend_title("TOOL ID")+
theme_minimal()
# TOOL_CONFIDENCE by STIMULUS
b <- ggplot(df, aes( x = (STIMULUS), y = TOOL_CONF, color = fct_rev(TOOL_ID))) +
geom_boxplot(position=position_dodge(0.9), width = 0.6)+
geom_jitter(position=position_jitterdodge(), alpha = 0.2) +
scale_color_paletteer_d("awtools::a_palette", direction = 1) +
labs (x = "STIMULUS") +
easy_add_legend_title("TOOL ID")+
theme_minimal()
# TOOL_CONFIDENCE
c <- ggplot(df, aes( x = (STIMULUS), y = TOOL_CONF)) +
geom_boxplot(position=position_dodge(0.9), width = 0.5)+
geom_jitter(width = 0.10, alpha = 0.4) +
labs (title = "") +
labs (x = "") +
theme_minimal()
(p <- (a / b / c ) + plot_annotation(
title = 'TOOL ID & CONFIDENCE by Stimulus',
subtitle = '',
caption = ''))
###################################################
rm(a,b,c,p)
#FILTER DATASET
df <- df_graphs %>% filter(Assigned.Block == 2)
# ENCOUNTER CHOICE BY STIMULUS AND SAMPLE
ggplot( df, aes( x = (STIMULUS), fill = ENCOUNTER)) +
geom_bar(position = "stack") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
labs( title = "MAKER ENCOUNTER by Stimulus",
subtitle = "", x = "") +
scale_fill_brewer(palette = "Dark2") +
coord_flip()+
# easy_add_legend_title("") +
theme_minimal()
#ENCOUNTER CHOICE
## HORIZONTAL BAR CHART
ggplot( df, aes( x = fct_rev(STIMULUS), fill = (ENCOUNTER))) +
geom_bar(position = "stack") +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") +
labs( title = "ENCOUNTER CHOICE by Stimulus",
subtitle = "", x = "") +
scale_fill_brewer(palette = "Dark2") +
coord_flip()+
easy_add_legend_title("") +
theme_minimal()
#ENCOUNTER CHOICE
## GGSTATSPLOT
ggbarstats( data = df, x = "ENCOUNTER", y = "STIMULUS",
legend.title = "ENCOUNTER") +
scale_fill_brewer(palette = "Dark2") +
labs( title = "ENCOUNTER BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
#FILTER DATASET
df <- df_actions %>% filter( str_detect(STIMULUS, "B2"))
# ACTION CHOICE BY STIMULUS AND SAMPLE
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
geom_bar(position = "stack") +
coord_flip() +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ Distribution, scales = "free", space = "free") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
# scale_y_continuous(labels = scales::percent) +
labs( title = "Chart Action by Stimulus",
subtitle = "", x = "") +
easy_add_legend_title("ACTION") +
theme_minimal()
# ACTION CHOICE BY STIMULUS
ggplot(data = df, aes( fill = CHART_ACTION, x = fct_rev(STIMULUS) )) +
geom_bar(position = "stack") +
coord_flip() +
facet_grid(fct_rev(STIMULUS_CATEGORY) ~ ., scales = "free", space = "free") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
# scale_y_continuous(labels = scales::percent) +
labs( title = "Chart Action by Stimulus",
subtitle = "", x = "") +
easy_add_legend_title("ACTION") +
theme_minimal()
#ACTION CHOICE CHOICE BY STIMULUS
## GGSTATSPLOT
ggbarstats( data = df, x = "CHART_ACTION", y = "STIMULUS",
legend.title = "CHART ACTION") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1) +
labs( title = "CHART ACTION BY STIMULUS ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
ie. mixed model, but also look at stability of individual choice across stimuli maybe treat individual as fixed factor and look at amount of variance explained?
revisit ggwithin and grouped within, which require a continuous outcome
# ggwithinstats(
# data = df %>% filter(STIMULUS %in% c("B2-1","B2-2")),
# x = STIMULUS,
# y = MAKER_ID,
# type = "robust", ## type of statistical test
# xlab = "MAKER_ID", ## label for the x-axis
# ylab = "MAKER_CONF", ## label for the y-axis
# # package = "yarrr", ## package from which color palette is to be taken
# # palette = "info2", ## choosing a different color palette
# title = "MAKER ID BY MAKER_CONF"
# )
df <- df_graphs
qacBase::qstats(df, CHART_BEAUTY, STIMULUS) %>% arrange(mean)
## STIMULUS n mean sd
## 1 B5-1 53 27.23 22.54
## 2 B4-2 54 28.24 26.07
## 3 B3-1 52 28.56 22.96
## 4 B3-3 52 31.54 27.19
## 5 B2-2 52 31.73 26.11
## 6 B1-3 55 38.13 32.33
## 7 B6-4 52 40.12 29.57
## 8 B1-4 55 45.18 28.97
## 9 B1-2 55 46.09 25.28
## 10 B6-2 52 46.12 27.68
## 11 B5-2 53 49.17 22.59
## 12 B0-0 318 49.67 26.95
## 13 B5-3 53 50.19 27.54
## 14 B4-1 54 53.09 27.92
## 15 B2-1 52 55.13 25.80
## 16 B1-1 55 55.75 29.37
## 17 B6-1 52 56.06 24.03
## 18 B3-2 52 57.25 25.32
## 19 B5-4 53 57.81 26.61
## 20 B2-4 52 61.62 26.60
## 21 B4-4 54 61.70 28.12
## 22 B6-3 52 62.19 27.48
## 23 B3-4 52 63.60 27.31
## 24 B4-3 54 66.56 20.81
## 25 B2-3 52 74.23 24.85
qacBase::qstats(df, CHART_BEAUTY, STIMULUS_CATEGORY) %>% arrange(mean)
## STIMULUS_CATEGORY n mean sd
## 1 B 318 43.05 27.26
## 2 A 318 46.05 28.45
## 3 F 318 49.67 26.95
## 4 C 318 53.73 30.80
## 5 D 318 54.96 29.11
#
#
# ggplot(df, aes(x = CHART_BEAUTY)) +
# geom_histogram(bins = 20) +
# facet_grid(BLOCK ~ STIMULUS_CATEGORY, scales = "free", space = "free", drop=TRUE) +
# theme_minimal()
#
#
# ggplot(df, aes(x = CHART_BEAUTY)) +
# geom_histogram(bins = 20) +
# facet_grid( ~ fct_rev(STIMULUS_CATEGORY), scales = "free", space = "free", drop=TRUE) +
# theme_minimal()
#
##ggstatsplot BY CATEGORY
grouped_gghistostats(
data = df_graphs %>% filter(STIMULUS != "B0-0"),
x = CHART_BEAUTY, ## same outcome variable
grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
type = "robust", ## robust test: one-sample percentile bootstrap
test.value = 50, ## test value against which sample mean is to be compared
centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
# ggtheme = ggthemes::theme_stata(), ## changing default theme
## turn off ggstatsplot theme layer
## arguments relevant for combine_plots
annotation.args = list(
title = "DISTRIBUTION of Chart Beauty by Cateogry",
caption = ""
),
plotgrid.args = list(nrow = 6)
)
#B0-0
gghistostats(
data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
x = CHART_BEAUTY, ## numeric variable
xlab = "CHART BEAUTY", ## x-axis label
title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
test.value = 50, ## test value
caption = ""
)
df <- df_graphs
qacBase::qstats(df, CHART_TRUST, STIMULUS) %>% arrange(mean)
## STIMULUS n mean sd
## 1 B1-3 55 30.98 27.33
## 2 B3-3 52 39.92 27.16
## 3 B1-4 55 45.45 23.37
## 4 B2-4 52 47.94 26.74
## 5 B5-3 53 49.15 24.83
## 6 B4-2 54 50.50 21.10
## 7 B0-0 318 50.79 20.05
## 8 B6-4 52 52.00 22.13
## 9 B5-4 53 53.68 21.04
## 10 B6-3 52 54.37 21.55
## 11 B2-2 52 54.52 22.15
## 12 B4-4 54 55.61 24.54
## 13 B6-1 52 56.52 19.00
## 14 B1-2 55 56.98 22.19
## 15 B6-2 52 57.50 21.89
## 16 B1-1 55 58.04 22.54
## 17 B3-2 52 58.52 18.56
## 18 B3-4 52 59.00 20.68
## 19 B5-1 53 61.77 23.68
## 20 B5-2 53 62.36 20.57
## 21 B2-1 52 62.44 20.45
## 22 B3-1 52 63.31 16.86
## 23 B2-3 52 66.00 22.10
## 24 B4-1 54 66.24 22.82
## 25 B4-3 54 72.48 19.94
qacBase::qstats(df, CHART_TRUST, STIMULUS_CATEGORY) %>% arrange(mean)
## STIMULUS_CATEGORY n mean sd
## 1 F 318 50.79 20.05
## 2 C 318 52.07 27.81
## 3 D 318 52.24 23.45
## 4 B 318 56.71 21.27
## 5 A 318 61.39 21.15
##ggstatsplot BY CATEGORY
grouped_gghistostats(
data = df_graphs %>% filter(STIMULUS != "B0-0"),
x = CHART_TRUST, ## same outcome variable
grouping.var = STIMULUS, ## grouping variable males = 1, females = 2
type = "robust", ## robust test: one-sample percentile bootstrap
test.value = 50, ## test value against which sample mean is to be compared
centrality.line.args = list(color = "#D55E00", linetype = "dashed"),
# ggtheme = ggthemes::theme_stata(), ## changing default theme
## turn off ggstatsplot theme layer
## arguments relevant for combine_plots
annotation.args = list(
title = "DISTRIBUTION of Chart TRUST by Cateogry",
caption = ""
),
plotgrid.args = list(nrow = 6)
)
#B0-0
gghistostats(
data = df_graphs %>% filter(STIMULUS == "B0-0"), ## data from which variable is to be taken
x = CHART_TRUST, ## numeric variable
xlab = "CHART TRUST", ## x-axis label
title = "B0-0 MILLENIAL PINK PLANTS", ## title for the plot
test.value = 50, ## test value
caption = ""
)
wip code stash
#### DEFINE SET
stimulus = "B2-1"
df <- df_graphs %>% filter(STIMULUS == stimulus)
#### GENERATE GRAPHS
#MAKER_ID-DONUT
PieChart(MAKER_ID, data = df,
fill = "reds",
main = paste0(stimulus, " MAKER ID")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
# #MAKER_GENDER-DONUT
# PieChart(MAKER_GENDER, data = df,
# fill = "blues",
# main = paste0(stimulus, " MAKER GENDER")) + theme_minimal()
#
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_AGE, data = df,
# fill = "olives",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "rusts",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "olives",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "greens",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "emeralds",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "turquoises",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "aquas",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-MAKER_ID
PieChart(MAKER_ID, data = df,
fill = "purples",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "magentas",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "violets",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
#MAKER_AGE-DONUT
PieChart(MAKER_ID, data = df,
fill = "grays",
main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
## >>> suggestions
## PieChart(MAKER_ID, hole=0) # traditional pie chart
## PieChart(MAKER_ID, values="%") # display %'s on the chart
## PieChart(MAKER_ID) # bar chart
## Plot(MAKER_ID) # bubble plot
## Plot(MAKER_ID, values="count") # lollipop plot
##
## --- MAKER_ID ---
##
## MAKER_ID Count Prop
## -------------------------
## individual 0 0.000
## organization 1 0.019
## news 11 0.212
## education 19 0.365
## political 4 0.077
## business 17 0.327
## -------------------------
## Total 52 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 38.923, df = 5, p-value = 0.000
## NULL
# "reds" h 0
# "rusts" h 30
# "browns" h 60
# "olives" h 90
# "greens" h 120
# "emeralds" h 150
# "turquoises" h 180
# "aquas" h 210
# "blues" h 240
# "purples" h 270
# "violets" h 300
# "magentas" h 330
# "grays"
## EXAMPLE ALLUVIAL PLOT USING GGALUVIAL (instead of GGSANKEY)
# https://corybrunson.github.io/ggalluvial/articles/ggalluvial.html
# #FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
# ds <- df_graphs %>%
# filter(str_detect(STIMULUS, "B2")) %>%
# select(STIMULUS, MAKER_ID, PID) %>%
# mutate(
# MAKER_ID = fct_relevel(MAKER_ID,
# c("business","education","individual", "news","organization", "political" ))
# )
#
# ds %>%
# ggplot(aes( x = STIMULUS,
# stratum = MAKER_ID,
# label = MAKER_ID,
# alluvium = PID)) +
# stat_alluvium(aes(fill = MAKER_ID),
# width = 0,
# alpha = 1,
# geom = "flow")+
# geom_stratum(width = 0.2, aes(fill= MAKER_ID))+
# # geom_text(stat = "stratum", size = 5, angle = 90)+
# scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE,
# alpha = 1) +
# theme_minimal()